*** This file runs regressions to generate estimates of average effects across different specifications
*** Here we use the CEP policy categories and 2019 as the post period
*** Drop Switcher States. Least-Skilled Treated Group
*** Testing Bootstrapping absorbing  all dummies





capture log close 
clear all
set more off
set trace off

* Set path * If needed change global path to point to directory where files are stored on your computer
*global path "I:/DataSets5/Duncan/Dropbox/Recent Minimum Wage Changes/2020.12 NBER Update/JOLE Precommittment Replication"
global dtadir "$path/Data"
global tabdir "$path/Tables"
global figdir "$path/Figures"
global estdir "$path/Estimates"
global logdir "$path/Logfiles"


log using "$logdir/boot-emp-cep-lowskill-noswitchers-2019-2019-seed-789012.log", replace


*Set seed
set seed 789012

* 2) Define bootstrap program for stratified sampling
capture program drop bs_strat
program define bs_strat, rclass

	*------------------------ 1.1 Syntax -------------------------- 

	syntax, postmin(real) categories(string) sample(string) switchers(string)

	
	*------------------------ 1.2 Set Up -------------------------- 

		
	preserve

	
	*------------------------ 1.3 DD and DDD Regression --------------------------  	

	* ACS Regressions
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid) cluster(stateid) compact
	gen emp_beta_acs_large1=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small1=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index1=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post lnPersonalIncome [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid) cluster(stateid) compact
	gen emp_beta_acs_large2=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small2=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index2=(_b[1.indexer#1.post])
	*
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post HPI [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid) cluster(stateid) compact
	gen emp_beta_acs_large3=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small3=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index3=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post stateempE [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid) cluster(stateid) compact
	gen emp_beta_acs_large4=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small4=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index4=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid i.age i.educ) cluster(stateid) compact
	gen emp_beta_acs_large5=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small5=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index5=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post lnPersonalIncome HPI stateempE [aw=perwt] if lowskill == 1 & acs == 1, absorb(i.year i.stateid i.age i.educ) cluster(stateid) compact
	gen emp_beta_acs_large6=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_acs_small6=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_acs_index6=(_b[1.indexer#1.post])
	*
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if acs == 1, absorb(i.stateid##i.time i.affected##i.year i.affected##i.stateid) cluster(stateid) compact
	gen emp_beta_acs_large7=(_b[1.Affect_StatLg_Post])
	gen emp_beta_acs_small7=(_b[1.Affect_StatSm_Post])
	gen emp_beta_acs_index7=(_b[1.Affect_Index_Post])
	
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if acs == 1, absorb(i.stateid##i.time i.affected##i.year i.affected##i.stateid i.age i.educ) cluster(stateid) compact
	gen emp_beta_acs_large8=(_b[1.Affect_StatLg_Post])
	gen emp_beta_acs_small8=(_b[1.Affect_StatSm_Post])
	gen emp_beta_acs_index8=(_b[1.Affect_Index_Post])
	*/

	* CPS Regressions
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact
	gen emp_beta_cps_large1=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small1=(_b[1.StatIncreaserSmall#1.post])
	gen emp_beta_cps_index1=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post lnPersonalIncome [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact
	gen emp_beta_cps_large2=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small2=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_cps_index2=(_b[1.indexer#1.post])
	*
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post HPI [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact
	gen emp_beta_cps_large3=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small3=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_cps_index3=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post stateempE [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid) cluster(stateid) compact
	gen emp_beta_cps_large4=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small4=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_cps_index4=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid i.age i.educ) cluster(stateid) compact
	gen emp_beta_cps_large5=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small5=(_b[1.StatIncreaserSmall#1.post]) 
	gen emp_beta_cps_index5=(_b[1.indexer#1.post])
	
	reghdfe employed i.StatIncreaserLarge##i.post i.StatIncreaserSmall##i.post i.indexer##i.post lnPersonalIncome HPI stateempE [aw=perwt] if lowskill == 1 & cps == 1, absorb(i.month##i.year i.stateid i.age i.educ) cluster(stateid) compact
	gen emp_beta_cps_large6=(_b[1.StatIncreaserLarge#1.post])
	gen emp_beta_cps_small6=(_b[1.StatIncreaserSmall#1.post])
	gen emp_beta_cps_index6=(_b[1.indexer#1.post])
	*
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if cps == 1, absorb(i.stateid##i.time i.affected##i.time i.affected##i.stateid) cluster(stateid) compact
	gen emp_beta_cps_large7=(_b[1.Affect_StatLg_Post])
	gen emp_beta_cps_small7=(_b[1.Affect_StatSm_Post])
	gen emp_beta_cps_index7=(_b[1.Affect_Index_Post])
	
	reghdfe employed i.Affect_StatLg_Post i.Affect_StatSm_Post i.Affect_Index_Post [aw=perwt] if cps == 1, absorb(i.stateid##i.time i.affected##i.time i.affected##i.stateid i.age i.educ) cluster(stateid) compact
	gen emp_beta_cps_large8=(_b[1.Affect_StatLg_Post])
	gen emp_beta_cps_small8=(_b[1.Affect_StatSm_Post])
	gen emp_beta_cps_index8=(_b[1.Affect_Index_Post])
	*/
	
	* Average coefficients across models and store as scalar for each bootstrap replication
	
	* All samples
	egen emp_beta_all_all = rowmean(emp_beta_acs* emp_beta_cps*)
	egen emp_beta_all_large = rowmean(emp_beta_acs_large* emp_beta_cps_large*)
	egen emp_beta_all_small = rowmean(emp_beta_acs_small* emp_beta_cps_small*)
	egen emp_beta_all_indexer = rowmean(emp_beta_acs_index* emp_beta_cps_index*)
	
	* ACS
	egen emp_beta_acs_all = rowmean(emp_beta_acs*)
	egen emp_beta_acs_large = rowmean(emp_beta_acs_large*)
	egen emp_beta_acs_small = rowmean(emp_beta_acs_small*)
	egen emp_beta_acs_indexer = rowmean(emp_beta_acs_index*)
	
	* CPS
	egen emp_beta_cps_all = rowmean(emp_beta_cps*)
	egen emp_beta_cps_large = rowmean(emp_beta_cps_large*)
	egen emp_beta_cps_small = rowmean(emp_beta_cps_small*)
	egen emp_beta_cps_indexer = rowmean(emp_beta_cps_index*)

	* Return means as scalars
	foreach var of varlist emp_beta_all_all-emp_beta_cps_indexer {
		sum `var', meanonly
		return scalar mean_`var' = r(mean)
	}
	restore
	*/	
end
				   


*****************************************************************************************************
************					   3. Set up data							            *************
***************************************************************************************************** 

tempfile acs
tempfile cps

*** Assemble relevant years of the ACS 
use "$dtadir/ACS-2019.dta", clear 

drop if year < 2011 

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 
gen employed = 0
replace employed = 1 if empstat == 1

**** Construct education variables
gen dropout = 0 
replace dropout = 1 if educ < 6
gen highschool = 0 
replace highschool = 1 if educ == 6
gen somecollege = 0
replace somecollege = 1 if educ >= 7 & educ < 10
gen collegeplus = 0
replace collegeplus = 1 if educ >= 10

*** Merge in HPI data
merge m:1 statefip year using "$dtadir/HPI_acs_2019.dta"
drop if _merge == 2
drop _merge

replace HPI = HPI/1000

*** Merge in personal income data
merge m:1 statefip year using "$dtadir/PersonalIncome_acs_2019.dta"
gen lnPersonalIncome = ln(PersonalIncome)
drop if _merge == 2
drop _merge

gen time = year

** creates mid skill employment rate 
gen group = 0 
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1) 

egen stateempD = mean(employed) if group == 1, by(year statefip) 
egen stateempE = max(stateempD), by(year statefip) 

gen lowskill = 0 
replace lowskill = 1 if inrange(age,16,25) & dropout == 1 

gen young = 0 
replace young = 1 if inrange(age,16,21)

gen primeage = 0
replace primeage = 1 if inrange(age,26,54)

keep if lowskill == 1 | young == 1 | primeage == 1

gen acs = 1
gen cps = 0

keep employed year statefip lnPersonalIncome HPI educ age stateempE lowskill young primeage acs cps perwt time

compress

save `acs', replace



*** Assemble relevant years of the basic monthly CPS
use "$dtadir/CPS-2019.dta", clear

drop if year < 2011

*** Drop seniors
drop if age >= 65 | age < 16
drop if empstat == 0

*** Construct economic outcomes of interest 

* if empstat = 10: "At work"
* if empstat = 12: "employed, not at work last week

gen employed = 0
replace employed = 1 if  empstat == 10 |  empstat == 12

*** Assume that armed forces are employed
replace employed = 1 if empstat == 1

**** Construct education variables
gen dropout = 0 
replace dropout = 1 if educ < 73
gen highschool = 0 
replace highschool = 1 if educ == 73
gen somecollege = 0
replace somecollege = 1 if educ >= 81 & educ <= 92
gen collegeplus = 0
replace collegeplus = 1 if educ >= 111

gen quarter = 1 if inlist(month,1,2,3)
replace quarter = 2 if inlist(month,4,5,6)
replace quarter = 3 if inlist(month,7,8,9)
replace quarter = 4 if inlist(month,10,11,12)

gen time = (100*year) + month

*** Merge in HPI data
merge m:1 statefip year quarter using "$dtadir/HPI_2019.dta"
drop if _merge == 2
drop _merge

replace HPI = HPI/1000

*** Merge in personal income data
merge m:1 statefip year quarter using "$dtadir/PersonalIncome_2019.dta"
gen lnPersonalIncome = ln(PersonalIncome)
drop if _merge == 2
drop _merge

** creates mid-skill employment rate 
gen group = 0 
replace group = 1 if (age <= 30 & age > 21 & highschool == 1) | (age > 30 & age <= 45 & dropout == 1) | (age > 45 & age < 65 & dropout == 1) 

egen stateempD = mean(employed) if group == 1, by(year month statefip) 
egen stateempE = max(stateempD), by(year month statefip) 

gen lowskill = 0 
replace lowskill = 1 if inrange(age,16,25) & dropout == 1

gen young = 0 
replace young = 1 if inrange(age,16,21)

gen primeage = 0
replace primeage = 1 if inrange(age,26,54)

keep if lowskill == 1 | young == 1 | primeage == 1

gen acs = 0
gen cps = 1

gen perwt = wtfinl

keep employed year month statefip lnPersonalIncome HPI time educ age stateempE lowskill primeage young acs cps perwt

compress

append using `acs'

* generate post variable 
cap drop post 
gen post = 0 if inrange(year,2011,2013) 
replace post = 1 if inrange(year,2019,2019)

drop if missing(post)

* Keep only needed observations to reduce memory
drop if missing(post)
keep if (lowskill == 1 | primeage == 1)


* merge in policy categories 
cap drop originaltype-increase5 
merge m:1 statefip using "$dtadir/min_wage_variables_for_ACS_and_CPS_analysis.dta", nogen keepusing(originaltype jan*min) 

cap drop indexer StatIncreaserLarge StatIncreaserSmall statutoryincreasein2014or2015 statutoryincreasein2014to2017 statutoryincreasein2014to2018

gen indexer = 0 
gen StatIncreaserLarge = 0 
gen StatIncreaserSmall = 0 
gen statutoryincreasein2014or2015 = 0
gen statutoryincreasein2014to2018 = 0

* CEP Categories
replace indexer = 1 if originaltype == "Indexer" 
replace statutoryincreasein2014or2015 = 1 if (jan2016min - jan2013min) > 0 & indexer == 0 
replace StatIncreaserLarge = 1 if indexer == 0 & (jan2015min - jan2013min) >= 1 & (jan2016min - jan2013min) != . 
replace StatIncreaserSmall = 1 if indexer == 0 & statutoryincreasein2014or2015 == 1 & StatIncreaserLarge == 0 

* Generate variables for DDD regressions
gen affected = .
replace affected = 1 if lowskill == 1
replace affected = 0 if primeage == 1

gen Affect_Index_Post = affected*indexer*post
gen Affect_StatLg_Post = affected*StatIncreaserLarge*post
gen Affect_StatSm_Post = affected*StatIncreaserSmall*post

* Drop switcher states
drop if inlist(statefip,4,8,23,29,41,50,53)

* Generate policygroup variable for doing proportional sampling correctly
gen policygroup = 1
replace policygroup = 2 if indexer == 1
replace policygroup = 3 if StatIncreaserSmall == 1
replace policygroup = 4 if StatIncreaserLarge == 1

compress


cd "$estdir/Bootstrap/seed-789012"

timer clear
timer on 1

* Run bootstrap command for stratified sampling for all means, ACS means, and CPS means for all changer states, large increasers, small increasers, and indexers.

* Policy categories: cep. Sample: lowskill. Switcher states: noswitchers. Post start: 2019 post end: 2019.
bootstrap emp_beta_all_allchange=r(mean_emp_beta_all_all) emp_beta_all_large=r(mean_emp_beta_all_large) emp_beta_all_small=r(mean_emp_beta_all_small) emp_beta_all_index=r(mean_emp_beta_all_indexer) ///
emp_beta_acs_allchange=r(mean_emp_beta_acs_all) emp_beta_acs_large=r(mean_emp_beta_acs_large) emp_beta_acs_small=r(mean_emp_beta_acs_small) emp_beta_acs_index=r(mean_emp_beta_acs_indexer) ///
emp_beta_cps_allchange=r(mean_emp_beta_cps_all) emp_beta_cps_large=r(mean_emp_beta_cps_large) emp_beta_cps_small=r(mean_emp_beta_cps_small) emp_beta_cps_index=r(mean_emp_beta_cps_indexer), ///
rep(100) noisily cluster(statefip) strata(policygroup) idcluster(stateid) saving("boot-emp-cep-lowskill-noswitchers-2019-2019", replace): bs_strat, postmin(2019) categories(cep) sample(lowskill) switchers(noswitchers)


* Save data from original call with data as is.
mat res = r(table) 
svmat res, names(col)
gen stat = ""
gen n = _n
replace stat = "beta" if n == 1
replace stat = "boot_se" if n == 2
replace stat = "z-score" if n == 3
replace stat = "pval" if n == 4
replace stat = "lo_ci" if n == 5
replace stat = "hi_ci" if n == 6

keep emp_beta_all_allchange-stat

cap drop policycat sample switchers postmin postmax
gen policycat = "cep"
gen sample = "lowskill"
gen switchers = "noswitchers"
gen postmin = 2019
gen postmax = 2019
gen iteration = _n

drop if missing(stat)

save "coef-emp-cep-lowskill-noswitchers-2019-2019.dta", replace

*  Add labels to bootstrap code sample
use "boot-emp-cep-lowskill-noswitchers-2019-2019.dta", clear
cap drop policycat sample switchers postmin postmax
gen policycat = "cep"
gen sample = "lowskill"
gen switchers = "noswitchers"
gen postmin = 2019
gen postmax = 2019
gen iteration = _n

label var policycat "CEP or New Policy Categories"
label var sample "Least-Skilled or Young Sample"
label var switchers "Switchers or No Switchers"
label var postmin "First Year of Post Period"
label var postmax "Last Year Post Period"
label var iteration "Bootstrap Iteration Number"

save "$boot-emp-cep-lowskill-noswitchers-2019-2019.dta", replace

di "Policy categories: cep. Sample: lowskill. Switcher states: noswitchers. Post start: 2019 post end: 2019. Done" 


timer off 1
timer list 1
log close

